
#Code is written for R Open 3.5.1, which can be downloaded here: https://mran.microsoft.com/release-history
#The CRAN mirror snapshot was taken on 2018-08-01.  R Open 3.5.1 automatically installs packages from this snapshot.


library(data.table) #setnames function, fread
library(lavaan) #for estimating SEM's with MLE.
library(lavaan.survey) #for clustering standard errors.


options(max.print = 2000)


# load ONE of these two .csv's.
    # In the first, the "search" variables are derived with six terms: smog + no2 + air pollution + ozone + pm2.5 + AQI
    # In the second, the "search" variables are derived with five terms: smog + no2 + air pollution + ozone + pm2.5
    # The Table 1 results shown in the manuscript use the us_pollution_paper_cbsas_dfmain (google 6 terms).csv data.

# df.main <- fread("./us_pollution_paper_cbsas_dfmain (google 6 terms).csv")
df.main <- fread("./us_pollution_paper_cbsas_dfmain (google 5 terms).csv")
    

# extract relevant variables
# Any rows with NA's will be removed.
met <- df.main[, c("year", "year.fe", "memi", "name.cbsa", "name.state", "state.fe", "search.adj", "search.adj.tm1", "search.adj.tm2", "search.adj.change", "search.adj.change.tm1", 'no2', "no2.tm1", "no2.tm2", "no2.change", "no2.change.tm1", 'frv_hpv.total.change', 'frv_hpv.total', 'frv_hpv.total.tm1', 'frv_hpv.total.change.tm1', 'frv_hpv.total.tm2', 'all_action', 'all_action.change', 'all_action.change.tm1', 'all_action.tm1', 'all_action.tm2', 'no2.state.change', 'no2.state.change.tm1', 'no2.state.tm1', 'no2.state.tm2', 'frv_hpv.total.state.change', 'frv_hpv.total.state.tm1', 'all_action.state.change', 'all_action.state.tm1', 'frv_hpv.total.state.change.tm1', 'frv_hpv.total.state.tm2', 'all_action.state.change.tm1', 'all_action.state.tm2', 'vote.prop', 'grp.change', 'all_enf.change', "all_enf.change.tm1", "all_enf", "all_enf.tm1", "all_enf.tm2", "grp.percap")]




# specify year range
# including met$memi == 1 drops micropolitan areas.
met <- met[met$year < 2017 & met$year > 2006]
#met <- met[met$year < 2017 & met$year > 2006 & met$memi == 1]




#rescale variables----
met$grp.change <- met$grp.change / 10000000
met$grp.percap <- met$grp.percap /100

#check number of observations----
#met <- metros[c("")]
met <- met[complete.cases(met),] #3,017 rows for 2007-2016.
nrow(met)


#### year dummies #### ----

data <- met$year.fe
data.f <- factor(data)

#matrix of dummy variables
#1 column for each year
#1 row for each observation
dummies <- model.matrix(~data.f)



#remove 1st column of dummies matrix, which corresponds to the intercept (its value is '1' for all obs)
#also eliminates the second column to exlude it from having a fixed effect (note that one year must not have a dummy variable).
dummies <- dummies[, - c(1)]

dummies.year <- dummies #set this asside for later.  I will need the column names to insert the dummy variables into the model.

#merge dummies matrix with met df.
met <- cbind(met, dummies)

#year.dummies.str <- paste(year.dummy.names, collapse = " + ")
dummies.year.str <- colnames(dummies.year)
dummies.year.str <- paste(dummies.year.str, collapse = " + ")



#### state dummies ####----
data <- met$state.fe
data.f <- factor(data)

#matrix of dummy variables
#1 column for each province
#1 row for each observation
dummies <- model.matrix(~data.f)

#remove 1st column of dummies matrix, which corresponds to the intercept (its value is '1' for all obs)
#also eliminates the second column to exlude it from having a fixed effect (note that one state must not have a dummy variable).
dummies <- dummies[, - c(1)]

dummies.state <- dummies #set this asside for later.  I will need the column names to insert the dummy variables into the model.

#merge dummies matrix with met df.
met <- cbind(met, dummies)

dummies.state.str <- colnames(dummies.state)
dummies.state.str <- paste(dummies.state.str, collapse = " + ") #collapse all the dummies.state.str items into a single string separated by +'s.


s1 <- 'search.adj.tm1 ~
            search.adj.tm2 +
            no2.change.tm1 +
            no2.tm1 +
            all_enf.change.tm1 +
            all_enf.tm1 +
            vote.prop'
s1 <- paste0(s1, " + ", dummies.year.str)
#s1 <- paste0(s1, " + ", dummies.state.str)


g1 <- 'all_enf ~
          all_enf.tm1 +
          search.adj.tm1 +
          no2.change.tm1 +
          no2.tm1'
#g1 <- paste0(g1, " + ", dummies.year.str)
#g1<- paste0(g1, " + ", dummies.state.str)


a1 <- 'no2 ~ 
        no2.tm1 +
        grp.change +
        search.adj.tm1 +
        all_enf.change.tm1 +
        all_enf.tm1'
#a1 <- paste0(a1, " + ", dummies.year.str)
a1 <- paste0(a1, " + ", dummies.state.str)



#error correlations----
errors1 <- 'search.adj.tm1 ~~ no2'



f1 <- paste(s1, g1, a1, sep = " \n ")
f2 <- paste(s1, g1, a1, errors1, sep = " \n ")


m1 <- sem(model = f1, data = met, estimator = "ML")
m2 <- sem(model = f2, data = met, estimator = "ML")



summary(m1, fit.measures = TRUE, standardize = TRUE, rsquare = TRUE, nd = 6)
summary(m2, fit.measures = TRUE, standardize = TRUE, rsquare = TRUE, nd = 6) 

